/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package maindatart.maindata;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.UUID;
import maindatart.metadata.MDMetaDataService;
import maindatart.metadata.MainDataMetaDataEntity;
import maindatart.rush.alg.MainDataRushAle;
import maindatart.rush.config.MainDataRashConfig;
import maindatart.rush.result.PendingDataEntity;
import maindatart.rush.result.RushReportEntity;
import maindatart.rush.result.RushResultService;

/**
 * 主数据清洗类
 *
 * @author Administrator
 */
public class MainDataRashService {

    MDMetaDataService mdmdService = new MDMetaDataService();
    MainDataService mdservice = new MainDataService();
    RushResultService rrService = new RushResultService();

    /**
     * 清洗主数据 将所有副本数据装入主数据库 副本数据装入主数据库时，与主数据库中数据逐一比对后，将数据插入主数据库中，并将相似和相同记录在
     *
     * @param mainDataName
     * @param cfg
     * @return 
     * @throws Exception
     */
    public RushReportEntity rushData(String mainDataName, MainDataRashConfig cfg) throws Exception {

        RushReportEntity rre = new RushReportEntity();
        String rreId = UUID.randomUUID().toString();
        rre.setId(rreId);

        String info = "";

        //读取元数据
        MainDataMetaDataEntity mdmd = mdmdService.getMainDataMetaDataByName(mainDataName);
        info = info + "读取元数据成功！";
        System.out.println("读取元数据成功！");
        //读取主数据成功
        MainDataListBean datas = mdservice.getAllMainDatas(mdmd);
        info = info + "读取主数据成功！";
        List<MainDataContener> maindatas = datas.getDatas();
        double p1 = cfg.getSame();
        double p2 = cfg.getDifferent();

        int corbonCount = mdmd.getCarbonDataCount();
        info = info + "检测到" + corbonCount + "个副本数据";
        System.out.println("检测到" + corbonCount + "个副本数据");
        int samecount = 0;
        int likecount = 0;

        for (int i = 1; i <= corbonCount; i++) {
            MainDataListBean corbonData = mdservice.getAllCorbonDatas(mdmd, Integer.toString(i - 1));
            List<MainDataContener> corbondatas = corbonData.getDatas();
            for (MainDataContener cd : corbondatas) {
                MainDataBean mb = new MainDataBean();
                mb.setMdmd(mdmd);
                mb.setMc(cd);
                mdservice.add(mb);

                for (MainDataContener md : maindatas) {
                    double same = MainDataRushAle.same(md, cd, mdmd, cfg);
                    if (same < 1 && same >= p1) {
                        PendingDataEntity pde = new PendingDataEntity();
                        pde.setMaindataID1((String) md.getValue("id"));
                        pde.setMaindataID2((String) cd.getValue("id"));
                        pde.setRushID(rreId);
                        pde.setCompareValue(same);
                        pde.setInfo("");
                        rrService.addSameLog(pde);
                        System.out.println("发现一个相同数据" + md.getValue("id") + "#" + cd.getValue("id") + "#相似度：" + same);

                        samecount++;
                    } else if (same < p1 && same >= p2) {
                        PendingDataEntity pde = new PendingDataEntity();
                        pde.setMaindataID1((String) md.getValue("id"));
                        pde.setMaindataID2((String) cd.getValue("id"));
                        pde.setRushID(rreId);
                        pde.setCompareValue(same);
                        rrService.addLikeLog(pde);
                        System.out.println("发现一个相似数据" + md.getValue("id") + "#" + cd.getValue("id") + "#相似度：" + same);
                        likecount++;
                    }
                }
            }
            maindatas.addAll(corbondatas);
            info = info + "完成第" + i + "个副本数据清洗";
            System.out.println("完成第" + i + "个副本数据清洗");
        }
        info = info + "数据清洗完毕。检测到相同数据" + samecount + "个" + ",相似数据" + likecount + "个";
        System.out.println("数据清洗完毕。检测到相同数据" + samecount + "个" + ",相似数据" + likecount + "个");
        rre.setInfo(info);
        Date now = new Date();
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");//可以方便地修改日期格式
        rre.setAlgName(cfg.getId());
        rre.setMainDataName(mainDataName);
        rre.setTime(dateFormat.format(now));

        rrService.recordRush(rre);

        return rre;
    }

    /**
     * 清洗主数据 将所有副本数据装入主数据库 然后在主数据库中统一清洗
     *
     * @param mainDataName
     * @param cfg
     * @return 
     * @throws Exception
     */
    public RushReportEntity rushMainData(String mainDataName, MainDataRashConfig cfg) throws Exception {

        RushReportEntity rre = new RushReportEntity();
        String rreId = UUID.randomUUID().toString();
        rre.setId(rreId);

        String info = "";

        //读取元数据
        MainDataMetaDataEntity mdmd = mdmdService.getMainDataMetaDataByName(mainDataName);
        info = info + "读取元数据成功！";
        System.out.println("读取元数据成功！");
        
        //将副本数据先导入主数据表中
        int corbonCount = mdmd.getCarbonDataCount();
        info = info + "检测到" + corbonCount + "个副本数据";
        System.out.println("检测到" + corbonCount + "个副本数据");
        for (int i = 1; i <= corbonCount; i++) {
            MainDataListBean corbonData = mdservice.getAllCorbonDatas(mdmd, Integer.toString(i - 1));
            List<MainDataContener> corbondatas = corbonData.getDatas();
            for (MainDataContener cd : corbondatas) {
                MainDataBean mb = new MainDataBean();
                mb.setMdmd(mdmd);
                mb.setMc(cd);
                mdservice.add(mb);
            }
        }
        info = info + "副本数据加入主数据表完毕";
        System.out.println("副本数据加入主数据表完毕");

        //读取主数据成功
        MainDataListBean datas = mdservice.getAllMainDatas(mdmd);
        info = info + "读取主数据成功！"+ datas.getDatas().size();
        
        
        double p1 = cfg.getSame();
        double p2 = cfg.getDifferent();
        //相同的数据数量
        int samecount = 0;
        //相似的数据数量
        int likecount = 0;
        
        List<MainDataContener> maindatas = datas.getDatas();
        
        for (int i = 0; i < maindatas.size(); i++) {
            MainDataContener mdcSecond = maindatas.get(i);
            for (int j = i + 1; j < maindatas.size(); j++) {
                MainDataContener mdcFrist = maindatas.get(j);
                double same = MainDataRushAle.same(mdcFrist, mdcSecond, mdmd, cfg);
                if (same < 1 && same >= p1) {
                    PendingDataEntity pde = new PendingDataEntity();
                    pde.setMaindataID1((String) mdcFrist.getValue("id"));
                    pde.setMaindataID2((String) mdcSecond.getValue("id"));
                    pde.setRushID(rreId);
                    pde.setCompareValue(same);
                    pde.setInfo("");
                    rrService.addSameLog(pde);
                    System.out.println("发现一个相同数据" + mdcFrist.getValue("id") + "#" + mdcSecond.getValue("id") + "#相似度：" + same);

                    samecount++;
                } else if (same < p1 && same >= p2) {
                    PendingDataEntity pde = new PendingDataEntity();
                    pde.setMaindataID1((String) mdcFrist.getValue("id"));
                    pde.setMaindataID2((String) mdcSecond.getValue("id"));
                    pde.setRushID(rreId);
                    pde.setCompareValue(same);
                    rrService.addLikeLog(pde);
                    System.out.println("发现一个相似数据" + mdcFrist.getValue("id") + "#" + mdcSecond.getValue("id") + "#相似度：" + same);
                    likecount++;
                }
            }
        }

        info = info + "数据清洗完毕。检测到相同数据" + samecount + "个" + ",相似数据" + likecount + "个";
        System.out.println("数据清洗完毕。检测到相同数据" + samecount + "个" + ",相似数据" + likecount + "个");
        rre.setInfo(info);
        Date now = new Date();
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");//可以方便地修改日期格式
        rre.setAlgName(cfg.getId());
        rre.setMainDataName(mainDataName);
        rre.setTime(dateFormat.format(now));

        rrService.recordRush(rre);

        return rre;
    }

    public static void main(String[] args) {

    }
}
